Data import

QA band filtering

Read QA band

filtering function

# high confidence clouds or high confidence cloud shadows or fill values
mask_medconf <- function(x){
  # use base R function to convert numeric into bits
  bs <- intToBits(x)
  if ( ((bs[1]) | # cloud
       # (bs[2]) | # low cloud confidence
        (bs[3]) | # shadow
        (bs[4]) | # snow
        (bs[5]) | # water
        (bs[6]) | # aerosol (only low quality)
        (bs[8]) | # subzero
        (bs[9]) | # saturation
        (bs[10]) | # illumination
        (bs[11]) | # saturation
        (bs[13]) | # slope
        (bs[14])) # vapor
       == 1){
    return("flag") } else {
      return("valid")
    }
}



# due to the nature of the intToBits function it is not possible to perform 
# this step in a mutate pipe. Therefor a vector is created via lapply, 
# joined to then joined to the fs_LND df and finally flaged pixels can be removed
mask_idex <- lapply(raw_fs$QAI, mask_medconf) %>% unlist()

fs_LND_filtered <- raw_fs %>%
  mutate(QAI = mask_idex) %>% 
  filter(QAI == "valid")

Quite important to note here is that while the absolute number of pixels additionally filtered with more strict conditions applied is quite small, increasing these conditions did yield effective improvements in removing additional outlyers in the data (particularly evident in direct pixel scatter plot comparisons between LND05/07/08). Therefor the deliberate inclusions/exclusion of certain filtering criterion could disproportionately sway the final results of sensor comparisons and feature spaces.

Katjas method compairson

# Katja´s method to filter QAI values
raw_fs_KK <- raw_fs

raw_fs_KK$qa_cloud <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 1), 3)
raw_fs_KK$qa_shadow <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 3), 1)
raw_fs_KK$qa_snow <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 4), 1)
raw_fs_KK$qa_water <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 5), 1)
raw_fs_KK$qa_aerosol <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 6), 3)
raw_fs_KK$qa_subzero <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 8), 1)
raw_fs_KK$qa_saturation <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 9), 1)
raw_fs_KK$qa_zenith <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 10), 1)
raw_fs_KK$qa_illumination <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 11), 3)
raw_fs_KK$qa_slope <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 13), 1)
raw_fs_KK$qa_vapor <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 14), 1)


raw_fs_KK$quality <- 0
raw_fs_KK$quality[raw_fs_KK$qa_cloud != 0 |raw_fs_KK$qa_snow != 0 |raw_fs_KK$qa_shadow != 0] <- 1
raw_fs_KK <- raw_fs_KK[raw_fs_KK$quality == 0, ]
print(paste("number of observations following SHS method for QAI filtering:", round(nrow(fs_LND_filtered)/6)) )
## [1] "number of observations following SHS method for QAI filtering: 35109"
print(paste("number of observations following KK method QAI filtering:", round(nrow(raw_fs_KK)/6)) )
## [1] "number of observations following KK method QAI filtering: 74693"
print(paste("The difference in included observation:", round(nrow(fs_LND_filtered)/nrow(raw_fs_KK), digits = 2), "% (n=",  (nrow(fs_LND_filtered)-nrow(raw_fs_KK)),  ")") )
## [1] "The difference in included observation: 0.47 % (n= -237504 )"

As seen above the differences in the filtering methods and parameterization is very marginal, at ~1% difference in amount of included observations

##Additional data filtering and auxilirary varaible and indicity computation

fs_LND <- fs_LND_filtered %>% 
  # reduce file size for testing
  #sample_n(10000) %>% 
  # filter out extreme values
  filter(across(BLUE:SWIR2, ~ . > 0),
         across(BLUE:SWIR2, ~ . < 10000)) %>% 
  mutate(date   = as.Date(str_sub(scene, 1, 9),format = "%Y%m%d"),
         month  = lubridate::month(date),
         year   = lubridate::year(date), 
         month_year = paste0(month,"_",year),
         doy    = lubridate::yday(date),
         sensor = str_sub(scene, -5, -1),
         SWIR_ratio = SWIR2/SWIR1,
         NDVI   = ((NIR-RED)/(NIR+RED)),
         NDTI   = ((SWIR1-SWIR2)/(SWIR1+SWIR2))) # source: https://www.mdpi.com/2072-4292/10/10/1657/htm
## Warning: Using `across()` in `filter()` is deprecated, use `if_any()` or
## `if_all()`.

## Warning: Using `across()` in `filter()` is deprecated, use `if_any()` or
## `if_all()`.

spectra vis

NDVI time series

ggplot(fs_LND, aes(doy, NDVI, color=year, group=year)) +
  geom_smooth() +
  scale_colour_viridis_c(option = "D") +
  theme_minimal()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Here it can be seen that over time, average NDVI maximum values tend to increase over time, particularly early in the year before the growing season. Over such a long time frame it is possible though that this chance could be attributed to more irrigation, climate change, or LULC rather been strictly being resultant of deviations in sensor spec?

NDTI time series

ggplot(na.omit(fs_LND), aes(doy, NDTI, color=year, group=year)) +
  geom_smooth() +
  scale_colour_viridis_c(option = "D") +
  theme_minimal()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Similar story here, with NDTI decreasing over time. NDTI is also negatively associated with tillage, so the dip in the growing season is explained by more bare soil (tilled) locations being progressively covered by PV. Sudden decreases during the growing season indicate a tilling event, where as a slow decline at the end of the growing season are more likely to indicate PV senescence.

data wrangel into long

fs_LND_long <- fs_LND %>% 
  pivot_longer(cols=c("BLUE":"SWIR2"),names_to = "wavelength", values_to = "reflectance") %>% 
  as.data.frame()  %>% 
    mutate(wavelength_num = case_when(wavelength == "BLUE" ~ 482,
                                      wavelength == "GREEN" ~ 562,
                                      wavelength == "RED" ~ 655,
                                      wavelength == "NIR" ~ 865,
                                      wavelength == "SWIR1" ~ 1610,
                                      wavelength == "SWIR2" ~ 2200),
            reflectance = reflectance/100) # scale to percent

simple mean reflectance over time

ggplot(fs_LND_long, aes(wavelength_num, reflectance, color=year, group=year)) +
   # geom_smooth(formula = y ~ s(x, bs = "cs", k=6)) +
   stat_summary(fun=mean, geom="line", size = 1) + # draw a mean line in the data
  scale_colour_viridis_c(option = "D") +
  theme_minimal()

When plotting a simple mean of spectra across years it can be seen that more recent years dont defaco have higher reflectance values across the entire electromagnetic spectrum

boxplot

ggplot(fs_LND_long, aes(wavelength, reflectance, color=sensor)) +
 # geom_jitter() +
  geom_boxplot() +
  scale_colour_viridis_d(option = "D") +
  theme_minimal()

Looks like lots of extreme values (even post QAI filter), but when looking at the next density plot section, you can that relatively speaking, these extreme outlying values are exceedinly rare and marginal.

density

# facet wrap by wavelength
ggplot(fs_LND_long, aes(reflectance, color=sensor, fill=sensor)) +
  geom_density(alpha = 0.05) +
  #geom_jitter() +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  scale_x_continuous(expand = c(0, 0)) +
  #scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
  theme_minimal() +
  guides(col = guide_legend(nrow = 3))+
  theme(legend.position = "bottom")  +
  facet_wrap(~wavelength)

# facet wrap by sensor
ggplot(fs_LND_long, aes(reflectance, color=wavelength, color=wavelength)) +
  geom_density(alpha = 0.05) +
  #geom_jitter() +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  scale_x_continuous(expand = c(0, 0)) +
  #scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
  theme_minimal() +
  guides(col = guide_legend(nrow = 3))+
  theme(legend.position = "bottom")  +
  facet_wrap(~sensor)
## Warning: Duplicated aesthetics after name standardisation: colour

ridges

ggplot(fs_LND_long, aes(x = reflectance, y = sensor, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 1.2, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "reflectance", option = "D") +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 75)) +
  theme_minimal() +
  facet_wrap(~wavelength)
## Picking joint bandwidth of 0.257
## Picking joint bandwidth of 0.276
## Picking joint bandwidth of 1.36
## Picking joint bandwidth of 0.416
## Picking joint bandwidth of 0.638
## Picking joint bandwidth of 0.51

ggplot(fs_LND_long, aes(x = reflectance, y = wavelength, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "reflectance", option = "D") +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 75)) +
  theme_minimal() +
  facet_wrap(~sensor)
## Picking joint bandwidth of 0.785
## Picking joint bandwidth of 0.397
## Picking joint bandwidth of 0.396
## Picking joint bandwidth of 0.456
## Picking joint bandwidth of 0.845

feature space vis

import reference data

reference_spectra <- read.csv("data/feature_space/sli_gen_dark_soils_0p4.csv",
                              encoding = "UTF-8") %>% 
  mutate(SWIR_ratio = SWIR2/SWIR1,
         NDVI   = ((NIR-RED)/(NIR+RED)),
         NDTI   = ((SWIR1-SWIR2)/(SWIR1+SWIR2)))

ggplot(reference_spectra, aes(NDVI, SWIR_ratio, color = cover)) +
  geom_point() +
  scale_colour_viridis_d(option = "D") +
  theme_minimal() 

plot complete data

# No dissagregation
ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 300) +
  geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2))
## Warning: Removed 7 rows containing non-finite values (stat_binhex).

Here we can see that there are two dense zones in the spectral feature spaces. The smaller and fainter zone is predominately populated by landsat 7 pixels (clearly seen in the following plot)

facet by sensor

ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 200) +
  geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
  facet_wrap(~sensor)
## Warning: Removed 7 rows containing non-finite values (stat_binhex).

Here we can see that the spectral library fits particularly well to Landsat 8 (probably do to the fact that is was created using lansat 8 pixels?).

facet by month

ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 100) +
  geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
  facet_wrap(~month)
## Warning: Removed 7 rows containing non-finite values (stat_binhex).

Here we can also see that across all sensors, the spectral library seems to triangulate the data points better during growing season months.

facet by year

Displayed with only point per reference class to improve interpretability

1985-2000

# FROM 1985-2000
fs_LND |> filter(year<2001) %>%
ggplot(., aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 100) +
  geom_point(data=reference_spectra[c(1,3,25),], aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
  facet_wrap(~year)

2001-2022

# FROM 2001-2022
fs_LND |> filter(year>2000) %>%
ggplot(., aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 100) +
  geom_point(data=reference_spectra[c(1,3,25),], aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
  facet_wrap(~year)

NDTI feature space

ggplot(fs_LND, aes(NDVI, NDTI)) +
  geom_point() +
  stat_bin_hex(bins = 250) +
  geom_point(data=reference_spectra, aes(NDVI, NDTI), color ="red") +
  scale_fill_continuous(type = "viridis") +
   scale_x_continuous(expand = c(0, 0), limits = c(-0.1, 1)) +
   scale_y_continuous(expand = c(0, 0), limits = c(-0.25, 0.8)) +
  theme_minimal()# +
## Warning: Removed 5 rows containing non-finite values (stat_binhex).
## Warning: Removed 5 rows containing missing values (geom_point).

#  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
#  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) 

facet by sensor

ggplot(fs_LND, aes(NDVI, NDTI)) +
  stat_bin_hex(bins = 200) +
  geom_point(data=reference_spectra, aes(NDVI, NDTI), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(-0.25, 1.2)) +
  facet_wrap(~sensor)
## Warning: Removed 3 rows containing non-finite values (stat_binhex).

facet by month

ggplot(fs_LND, aes(NDVI, NDTI)) +
  stat_bin_hex(bins = 100) +
  geom_point(data=reference_spectra, aes(NDVI, NDTI), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(-0.25, 1.2)) +
  facet_wrap(~month)
## Warning: Removed 3 rows containing non-finite values (stat_binhex).

# old computationally exorbinant method of and plotting density plots 

# # Get density of points in 2 dimensions.
# # @param x A numeric vector.
# # @param y A numeric vector.
# # @param n Create a square n by n grid to compute density.
# # @return The density within each square.
# get_density <- function(x, y, ...) {
#   dens <- MASS::kde2d(x, y, ...)
#   ix <- findInterval(x, dens$x)
#   iy <- findInterval(y, dens$y)
#   ii <- cbind(ix, iy)
#   return(dens$z[ii])
# }
# 
# fs_density <- as.data.frame(fs_LND) %>%
#   filter(SWIR_ratio != "Inf") %>% 
#   mutate(density = get_density((.)$NDVI, (.)$SWIR_ratio, n = 500)) 

# ggplot(fs_density, aes(NDVI, SWIR_ratio, color = density)) +
#   geom_point() +
#   geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
#   scale_colour_viridis_c(option = "D") +
#   theme_minimal() +
#   scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
#   scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
#   facet_wrap(~sensor)

single location band compairison

Overview of observations over time by sensor

Temporal overvire and year resolution

fs_LND |>  
  group_by( year, sensor) %>% 
  summarise(n_observation = n()) %>%  
  
ggplot(., aes(year, n_observation, color=sensor)) +
  geom_line() +
  scale_fill_continuous(type = "viridis") +
  scale_colour_viridis_d(option = "D") +
  theme_minimal() 
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

Temporal overview at month resolution

fs_LND_obs_overview <- fs_LND %>% 
  group_by(month, year, sensor) %>% 
  summarise(n_observation = n()) %>%
  mutate(timestamp = zoo::as.yearmon(paste0(month,"_", year), format="%m_%Y"))
## `summarise()` has grouped output by 'month', 'year'. You can override using the
## `.groups` argument.

Monthly time series

ggplot(fs_LND_obs_overview, aes(timestamp, n_observation, color=sensor)) +
  geom_line(size=1) +
  #geom_smooth( ) +
  scale_fill_continuous(type = "viridis") +
  scale_colour_viridis_d(option = "D") +
  scale_x_continuous(breaks=seq(1984,2022,2)) +
  theme_minimal()

The acute fluctuations in observation numbers seems to stem from the fact that during winter months there is a lower observation density (plotted below)

observation density by month

(same data with some different vis options)

ggplot(fs_LND_obs_overview, aes(x=month, y=n_observation, group=month,fill=sensor, color=sensor)) +
 # geom_boxplot(alpha=0.3, notch = T) +
  geom_violin(alpha=0.3) +
  geom_jitter(alpha=0.3) +
 #geom_density_ridges_gradient(scale = 1.2, rel_min_height = 0.01) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  theme_minimal() +
  scale_x_continuous(breaks=seq(1,12,1)) +
  scale_y_continuous(limits = c(0, 1000)) +
  facet_wrap(~sensor)

ggplot(fs_LND_obs_overview, aes(x=month, y=n_observation, group=month,fill=sensor, color=sensor)) +
  geom_boxplot(alpha=0.3) +
  geom_jitter(alpha=0.3) +
 #geom_density_ridges_gradient(scale = 1.2, rel_min_height = 0.01) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  theme_minimal() +
  scale_x_continuous(breaks=seq(1,12,1)) +
  scale_y_continuous(limits = c(0, 1000)) +
  facet_wrap(~sensor)

ggplot(fs_LND_obs_overview, aes(x=month, y=n_observation, group=month,fill=sensor, color=n_observation)) +
 geom_boxplot(alpha=0.5) +
 # geom_violin(alpha=0.3) +
  geom_jitter(alpha=0.9) +
 #geom_density_ridges_gradient(scale = 1.2, rel_min_height = 0.01) +
  scale_colour_viridis_c(option = "D") +
  scale_fill_viridis_d(option = "D") +
  theme_minimal() +
  scale_x_continuous(breaks=seq(1,12,1)) +
  scale_y_continuous(limits = c(0, 1000)) +
  facet_wrap(~sensor)

The density of observations in summer months are far greater (although more variable), While during winter months are seem to be constantly few observations. This trend is stable across sensors, so mostly likely can largely attributed to increased cloud/snow cover during winter months.

Exact matches of location and loose match time of observation

fs_LND_loose <- fs_LND_long %>% 
  # filter(sensor %in% c("LND07", "LND08")) %>% 
  mutate(obs_time_place = paste0(plyr::round_any(doy, 2, f = floor),"_", year,"_",POINT_ID)) %>% 
  group_by(obs_time_place) %>% 
  filter(n() > 6)

Total number of observations that match the loose condition of being within ten days of each other: 2.8334^{4}

Days of year with available observations: 2, 3, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 30, 31, 36, 37, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 330, 331, 332, 333, 334, 335, 338, 339, 340, 341, 342, 343, 348, 349, 350, 351, 354, 355, 356, 357, 362, 363

Sample compairison of LND07 and LND08 spectra

for (i in 1:4) {
  
  scens <- unique(fs_LND_loose$obs_time_place)[c((((i-1)*20)+1):((i)*20))]
  
  fs_LND_loose_subset <- fs_LND_loose %>% 
    filter(obs_time_place %in% scens) |> 
    filter(sensor %in% c("LND07", "LND08")) 
  
  print(ggplot( fs_LND_loose_subset, aes(wavelength_num, reflectance, color=sensor, fill=sensor)) +
    # geom_density(alpha = 0.05) +
    geom_line() +
   # scale_colour_viridis_d(option = "D") +
    #scale_fill_viridis_d(option = "D") +
    scale_x_continuous(expand = c(0, 0)) +
    #scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
    theme_minimal() +
    guides(col = guide_legend(nrow = 3))+
     theme(legend.position = "bottom", 
           axis.text.y = element_text(angle = 45),
           strip.text.y = element_text(size = 8, angle = 330))  +
    facet_wrap( ~obs_time_place) )
  }

scatterplots

fs_LND_scatter <- fs_LND_loose |> 
  select(obs_time_place,sensor,wavelength,reflectance) |> 
  pivot_wider(names_from = sensor, values_from = c(reflectance)) |> 
  rename(reflectance_LND04 = LND04,
         reflectance_LND05 = LND05,
         reflectance_LND07 = LND07,
         reflectance_LND08  = LND08,
         reflectance_LND09= LND09)

Scatterplot of Landsat 7 and 8 specta

my.formula <- y ~ x

fs_LND_scatter_LND_7_8 <- fs_LND_scatter |> select(-reflectance_LND04,-reflectance_LND05, -reflectance_LND09) |> 
  na.omit()

ggplot(fs_LND_scatter_LND_7_8,aes(reflectance_LND07, reflectance_LND08)) +
  stat_bin_hex(bins = 100) + 
  stat_smooth(method = "lm", formula = my.formula, color="red") +
  stat_poly_eq(formula = my.formula, 
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
               parse = TRUE,
               label.x =1.2,
               label.y =1.05) +
  geom_abline(intercept=1,slope=1, linetype=2) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  annotate("text", x=7, y=105,
           label= paste0("R2 =",round((cor((fs_LND_scatter_LND_7_8$reflectance_LND07), 
                                           (fs_LND_scatter_LND_7_8$reflectance_LND08),
                                           use="complete.obs")^2),2)), size=4.5, hjust=0) + 
  
  annotate("text", x= 7, y=100, hjust=0, size=4.5,
           label= paste0("RMSE ==",round(sqrt(mean(((fs_LND_scatter_LND_7_8$reflectance_LND07)-
                                                      (fs_LND_scatter_LND_7_8$reflectance_LND08))^2,
                                                   na.rm=TRUE)),2)), parse=TRUE)+
  
  annotate("text", x=7, y= 95, size=4.5, hjust=0,
           label=paste0("Bias = ", round((mean((fs_LND_scatter_LND_7_8$reflectance_LND07), na.rm=TRUE) -
                                            mean((fs_LND_scatter_LND_7_8$reflectance_LND08),na.rm=TRUE)),2)))+
  
  annotate("text", x=7, y=90,size=4.5,hjust=0, 
           label=paste0("MAE = ", round(mean(abs((fs_LND_scatter_LND_7_8$reflectance_LND07)-                                          
                                                   (fs_LND_scatter_LND_7_8$reflectance_LND08))),2)))

compairson by wavelength

ggplot(fs_LND_scatter, aes(reflectance_LND07, reflectance_LND08)) +
  stat_bin_hex(bins = 100) +
  stat_smooth(method = "lm", formula = my.formula, color="red") +
  stat_poly_eq(formula = my.formula, 
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
               parse = TRUE,
               label.x =1.2,
               label.y =1.05) +
  geom_abline(intercept=1,slope=1, linetype=2) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  facet_wrap(~wavelength, 
             scales="free"
  ) 
## Warning: Removed 45534 rows containing non-finite values (stat_binhex).
## Warning: Removed 45534 rows containing non-finite values (stat_smooth).
## Warning: Removed 45534 rows containing non-finite values (stat_poly_eq).

kable(
fs_LND_scatter |> 
  select(-reflectance_LND04,-reflectance_LND05,-reflectance_LND09) |> 
  group_by(wavelength) |> 
  summarise(R2   = round((cor((reflectance_LND07), 
                            (reflectance_LND08),
                            use="complete.obs")^2),2),
            
            RMSE = round(sqrt(mean(((reflectance_LND07)-
                                      (reflectance_LND08))^2, na.rm=TRUE)),2),
            
            Bias = round((mean((reflectance_LND07), na.rm=TRUE) -
                            mean((reflectance_LND08), na.rm=TRUE)),2),
            
            MAE = round(mean(abs((reflectance_LND07)-
                                   (reflectance_LND08)), na.rm=TRUE),2))
) %>%
  kable_styling(full_width = T)
wavelength R2 RMSE Bias MAE
BLUE 0.04 4.35 0.01 1.97
GREEN 0.06 4.02 0.06 1.85
NIR 0.67 6.13 -1.23 3.80
RED 0.20 3.97 0.23 1.87
SWIR1 0.45 4.23 0.74 2.61
SWIR2 0.51 3.32 -1.21 1.94

Scatterplot of Landsat 5 and 7 specta

fs_LND_scatter_LND_5_7 <- fs_LND_scatter |> select(-reflectance_LND04,-reflectance_LND08, -reflectance_LND09) |> 
  na.omit()


ggplot(fs_LND_scatter_LND_5_7, aes(reflectance_LND07, reflectance_LND05)) +
stat_bin_hex(bins = 100) +
  geom_abline(intercept=1,slope=1, linetype=2) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() + 
  stat_smooth(method = "lm", formula = my.formula, color="red") +
  stat_poly_eq(formula = my.formula, 
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
               parse = TRUE,
               label.x =1.2,
               label.y =1.05) +
  annotate("text", x=7, y=105,
           label= paste0("R2 =",round((cor((fs_LND_scatter_LND_5_7$reflectance_LND07), 
                                           (fs_LND_scatter_LND_5_7$reflectance_LND05),
                                           use="complete.obs")^2),2)), size=4.5, hjust=0) + 
  
  annotate("text", x= 7, y=100, hjust=0, size=4.5,
           label= paste0("RMSE ==",round(sqrt(mean(((fs_LND_scatter_LND_5_7$reflectance_LND07)-
                                                    (fs_LND_scatter_LND_5_7$reflectance_LND05))^2,
                                                   na.rm=TRUE)),2)), parse=TRUE)+
  
  annotate("text", x=7, y= 95, size=4.5, hjust=0,
           label=paste0("Bias = ", round((mean((fs_LND_scatter_LND_5_7$reflectance_LND07), na.rm=TRUE) -
                                          mean((fs_LND_scatter_LND_5_7$reflectance_LND05),na.rm=TRUE)),2)))+
  
  annotate("text", x=7, y=90,size=4.5,hjust=0, 
           label=paste0("MAE = ", round(mean(abs((fs_LND_scatter_LND_5_7$reflectance_LND07)-
                                                (fs_LND_scatter_LND_5_7$reflectance_LND05))),2))) 

compairson by wavelength

ggplot(fs_LND_scatter, aes(reflectance_LND07, reflectance_LND05)) +
  stat_bin_hex(bins = 100) +
  stat_smooth(method = "lm", formula = my.formula, color="red") +
  stat_poly_eq(formula = my.formula, 
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
               parse = TRUE,
               label.x =1.2,
               label.y =1.05) +
  geom_abline(intercept=1,slope=1, linetype=2) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  facet_wrap(~wavelength, 
             scales="free"
  ) 
## Warning: Removed 49668 rows containing non-finite values (stat_binhex).
## Warning: Removed 49668 rows containing non-finite values (stat_smooth).
## Warning: Removed 49668 rows containing non-finite values (stat_poly_eq).

kable(
fs_LND_scatter |> 
  select(-reflectance_LND04,-reflectance_LND08,-reflectance_LND09) |> 
  group_by(wavelength) |> 
  summarise(R2   = round((cor((reflectance_LND07), 
                            (reflectance_LND05),
                            use="complete.obs")^2),2),
            
            RMSE = round(sqrt(mean(((reflectance_LND07)-
                                      (reflectance_LND05))^2, na.rm=TRUE)),2),
            
            Bias = round((mean((reflectance_LND07), na.rm=TRUE) -
                            mean((reflectance_LND05), na.rm=TRUE)),2),
            
            MAE = round(mean(abs((reflectance_LND07)-
                                   (reflectance_LND05)), na.rm=TRUE),2))
) %>%
  kable_styling(full_width = T)
wavelength R2 RMSE Bias MAE
BLUE 0.05 3.65 -0.64 1.91
GREEN 0.10 3.45 -0.82 1.93
NIR 0.70 5.33 -1.93 3.21
RED 0.28 3.25 -0.29 1.74
SWIR1 0.50 4.20 -1.57 2.71
SWIR2 0.54 2.80 -0.01 1.50

Overall the correspondence between both Landsat 5/7 & 7/8 seems to be pretty good? R^2 values are around 90% and the MAE around a tick above 2%, which doesn’t seem to drastic? In both comparison cases LND07 seemed to have (very) slightly lower reluctance values. Notable is the occasional occurrence of extreme outlyers across all the visable wavelengths in the spectra. Given that polluted pixels such as snow and clouds should have been removed through the QAI filters I am unsure what could be the source of these observed deviations (especially since they are not consistent across wavelengths).

Some float from the ideal 1-to-1 line can be likely explained by the two day buffer in doy matching between pixels. The relative infrequence of these extreme outlyers could be explained by a major LULC event such as mowing or harvesting occurring exactly in this potential two day difference in observation date? Yet this does not then adress why the large deviance in reflectance would only be observed for blue,green,red pixels and not NIR+ ones…

test vis for tsi output

plot raw images

link <- "/data/Dagobah/greengrass/schnesha/03_RBF_interpolation_LND_1982-2021/X0060_Y0040/1984-2021_001-365_HL_TSA_LNDLG_NDV_TSI.tif"

tsi <- rast(link)

plot(tsi)

tsi_df <- as.data.frame(tsi)

save images and construct gif

for (i in c(1:#length(names(tsi))
            10)) {
  
  # Step 1: Call the pdf command to start the plot
  png(file = paste0("/data/Dagobah/greengrass/schnesha/03_RBF_interpolation_LND_1982-2021/ts_gif/plot_", i, ".png")) #,   # The directory you want to save the file in
   # width = 12, # The width of the plot in inches
    #height = 12) # The height of the plot in inches

  
  
plot(tsi[[i]])

# Step 3: Run dev.off() to create the file
dev.off()
  
  

}